"""
Phase 5: Referee Response Analyses — Sovereign Spreads
=======================================================
Addresses six referee comments:
  A. Standardized Z effects (SD-unit interpretation)
  B. Leave-one-out ratings robustness
  C. GIIPS influence diagnostics for spread regressions
  D. Country FE + Year FE robustness
  E. Proper mediation / suppression framing
  F. Structural break clarification (Sup-F vs Chow)

Output: output/tables/referee_*.md
"""

import sys
from pathlib import Path

import numpy as np
import pandas as pd
from scipy import stats

PROJECT_DIR = Path("/mnt/c/demographics_capital_flows/sovereign_spreads")
ROOT_DIR = PROJECT_DIR.parent
sys.path.insert(0, str(ROOT_DIR / "multilateral" / "src"))
from model import PanelGLS

PROCESSED_DIR = PROJECT_DIR / "data" / "processed"
TABLES_DIR = PROJECT_DIR / "output" / "tables"


def stars(p):
    if p < 0.01: return '***'
    if p < 0.05: return '**'
    if p < 0.10: return '*'
    return ''


def run_gls(df, dep_var, regressors, silent=True):
    """Run PanelGLS, return (model, sub_df) or (None, None)."""
    regressors = [r for r in regressors if r in df.columns]
    sub = df.dropna(subset=[dep_var] + regressors).copy()
    if len(sub) < 50:
        return None, None
    gls = PanelGLS()
    gls.fit(sub[dep_var].values, sub[regressors].values,
            sub['iso3'].values, sub['year'].values)
    gls.feature_names = regressors
    if not silent:
        gls.summary(feature_names=regressors)
    return gls, sub


# ═══════════════════════════════════════════════════════════════════════════
# LOAD DATA
# ═══════════════════════════════════════════════════════════════════════════
print("=" * 70)
print("PHASE 5: Referee Response Analyses")
print("=" * 70)

df = pd.read_csv(PROCESSED_DIR / "spread_panel.csv")
df = df[df['year'] <= 2024].copy()
print(f"Panel loaded: {df['iso3'].nunique()} countries, {len(df):,} obs, "
      f"{df['year'].min()}-{df['year'].max()}")
print(f"Columns: {list(df.columns[:20])} ... ({len(df.columns)} total)")

demo_vars = ['Z_1', 'Z_2', 'Z_3']
controls_rating = ['rgdp_growth', 'fiscal_bal_gdp', 'kaopen']
controls_spread = ['rgdp_growth', 'fiscal_bal_gdp', 'kaopen', 'nfa_gdp_lag']


# ═══════════════════════════════════════════════════════════════════════════
# PART A: STANDARDIZED Z EFFECTS
# ═══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("PART A: Standardized Z₁/Z₂/Z₃ Effects (SD units)")
print("=" * 70)

# Standardize Z variables
for z in ['Z_1', 'Z_2', 'Z_3']:
    mean_z = df[z].mean()
    std_z = df[z].std()
    df[z + '_std'] = (df[z] - mean_z) / std_z
    print(f"  {z}: mean={mean_z:.3f}, std={std_z:.3f}")

std_demo = ['Z_1_std', 'Z_2_std', 'Z_3_std']

std_results = []

# A-1: Standardized Z → rating (with controls)
gls, sub = run_gls(df, 'rating_numeric', std_demo + controls_rating)
if gls:
    row = {'Model': 'Rating (std Z + controls)',
           'N': gls.n_obs, 'Countries': gls.n_countries, 'R²': gls.r_squared}
    for i, v in enumerate(std_demo + controls_rating):
        if i < len(gls.beta):
            row[f'{v}_coef'] = gls.beta[i]
            row[f'{v}_se'] = gls.se[i]
            row[f'{v}_p'] = gls.pvalues[i]
    std_results.append(row)
    print(f"  Rating (std Z + controls): N={gls.n_obs}, R²={gls.r_squared:.4f}")
    for i, v in enumerate(std_demo):
        print(f"    {v}: {gls.beta[i]:.4f} ({gls.se[i]:.4f}) {stars(gls.pvalues[i])}")

# A-2: Standardized Z → rating (with controls + debt)
gls, sub = run_gls(df, 'rating_numeric', std_demo + controls_rating + ['govt_debt_gdp'])
if gls:
    row = {'Model': 'Rating (std Z + controls + debt)',
           'N': gls.n_obs, 'Countries': gls.n_countries, 'R²': gls.r_squared}
    regs = std_demo + controls_rating + ['govt_debt_gdp']
    for i, v in enumerate(regs):
        if i < len(gls.beta):
            row[f'{v}_coef'] = gls.beta[i]
            row[f'{v}_se'] = gls.se[i]
            row[f'{v}_p'] = gls.pvalues[i]
    std_results.append(row)
    print(f"  Rating (std Z + controls + debt): N={gls.n_obs}, R²={gls.r_squared:.4f}")
    for i, v in enumerate(regs):
        if i < len(gls.beta):
            print(f"    {v}: {gls.beta[i]:.4f} ({gls.se[i]:.4f}) {stars(gls.pvalues[i])}")

# A-3: Standardized Z → sovereign spread
gls, sub = run_gls(df, 'sovereign_spread', std_demo + controls_spread)
if gls:
    row = {'Model': 'Spread (std Z + controls)',
           'N': gls.n_obs, 'Countries': gls.n_countries, 'R²': gls.r_squared}
    regs = std_demo + controls_spread
    for i, v in enumerate(regs):
        if i < len(gls.beta):
            row[f'{v}_coef'] = gls.beta[i]
            row[f'{v}_se'] = gls.se[i]
            row[f'{v}_p'] = gls.pvalues[i]
    std_results.append(row)
    print(f"  Spread (std Z + controls): N={gls.n_obs}, R²={gls.r_squared:.4f}")
    for i, v in enumerate(regs):
        if i < len(gls.beta):
            print(f"    {v}: {gls.beta[i]:.4f} ({gls.se[i]:.4f}) {stars(gls.pvalues[i])}")

# A-4: Standardized Z → spread with debt interaction
df['Z_1_std_x_debt'] = df['Z_1_std'] * df['govt_debt_gdp']
gls, sub = run_gls(df, 'sovereign_spread',
                   std_demo + controls_spread + ['govt_debt_gdp', 'Z_1_std_x_debt'])
if gls:
    regs = std_demo + controls_spread + ['govt_debt_gdp', 'Z_1_std_x_debt']
    row = {'Model': 'Spread (std Z + debt interaction)',
           'N': gls.n_obs, 'Countries': gls.n_countries, 'R²': gls.r_squared}
    for i, v in enumerate(regs):
        if i < len(gls.beta):
            row[f'{v}_coef'] = gls.beta[i]
            row[f'{v}_se'] = gls.se[i]
            row[f'{v}_p'] = gls.pvalues[i]
    std_results.append(row)
    print(f"  Spread (std Z + debt interaction): N={gls.n_obs}, R²={gls.r_squared:.4f}")
    for i, v in enumerate(regs):
        if i < len(gls.beta):
            print(f"    {v}: {gls.beta[i]:.4f} ({gls.se[i]:.4f}) {stars(gls.pvalues[i])}")

# Save Part A
md = ["# Standardized Demographic Effects (SD Units)\n"]
md.append("Interpretation: a 1-SD increase in Z_k is associated with X change in the dependent variable.\n")
md.append("| Model | N | Countries | R² | Z₁_std | | Z₂_std | | Z₃_std | |")
md.append("|---|---|---|---|---|---|---|---|---|---|")
for r in std_results:
    z1c = r.get('Z_1_std_coef', np.nan)
    z1p = r.get('Z_1_std_p', np.nan)
    z2c = r.get('Z_2_std_coef', np.nan)
    z2p = r.get('Z_2_std_p', np.nan)
    z3c = r.get('Z_3_std_coef', np.nan)
    z3p = r.get('Z_3_std_p', np.nan)
    md.append(f"| {r['Model']} | {r['N']:,} | {r['Countries']} | {r['R²']:.3f} "
              f"| {z1c:.3f} | {stars(z1p)} | {z2c:.3f} | {stars(z2p)} "
              f"| {z3c:.3f} | {stars(z3p)} |")

md.append("\n## Full Coefficient Table\n")
md.append("| Model | Variable | Coef | SE | p-value | Sig |")
md.append("|---|---|---|---|---|---|")
for r in std_results:
    for v in std_demo + controls_rating + controls_spread + ['govt_debt_gdp', 'Z_1_std_x_debt']:
        ck = f'{v}_coef'
        if ck in r:
            p = r[f'{v}_p']
            md.append(f"| {r['Model']} | {v} | {r[ck]:.4f} | {r[f'{v}_se']:.4f} "
                      f"| {p:.4f} | {stars(p)} |")

out = TABLES_DIR / "referee_standardized.md"
out.write_text('\n'.join(md))
print(f"\n  Saved: {out}")


# ═══════════════════════════════════════════════════════════════════════════
# PART B: LEAVE-ONE-OUT RATINGS ROBUSTNESS
# ═══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("PART B: Leave-One-Out Country Robustness — Rating Model")
print("=" * 70)

regs_b = demo_vars + controls_rating + ['govt_debt_gdp']
sub_b = df.dropna(subset=['rating_numeric'] + regs_b).copy()
countries = sorted(sub_b['iso3'].unique())
print(f"  Rating sample: {len(countries)} countries, {len(sub_b):,} obs")

# Full sample baseline
gls_full, _ = run_gls(sub_b, 'rating_numeric', regs_b)
z1_full = gls_full.beta[0]
z1_p_full = gls_full.pvalues[0]
print(f"  Full sample Z₁: {z1_full:.4f} (p={z1_p_full:.4f}) {stars(z1_p_full)}")

loo_results = []
for c in countries:
    sub_loo = sub_b[sub_b['iso3'] != c]
    gls_loo, _ = run_gls(sub_loo, 'rating_numeric', regs_b)
    if gls_loo:
        loo_results.append({
            'dropped': c,
            'Z_1_coef': gls_loo.beta[0],
            'Z_1_se': gls_loo.se[0],
            'Z_1_p': gls_loo.pvalues[0],
            'n_obs': gls_loo.n_obs,
            'n_countries': gls_loo.n_countries,
        })

loo_df = pd.DataFrame(loo_results)
z1_min = loo_df['Z_1_coef'].min()
z1_max = loo_df['Z_1_coef'].max()
z1_mean = loo_df['Z_1_coef'].mean()
all_sig = (loo_df['Z_1_p'] < 0.05).all()
n_sig = (loo_df['Z_1_p'] < 0.05).sum()
n_sig10 = (loo_df['Z_1_p'] < 0.10).sum()

print(f"  LOO Z₁ range: [{z1_min:.4f}, {z1_max:.4f}], mean={z1_mean:.4f}")
print(f"  Significant at 5%: {n_sig}/{len(loo_df)} ({100*n_sig/len(loo_df):.0f}%)")
print(f"  Significant at 10%: {n_sig10}/{len(loo_df)} ({100*n_sig10/len(loo_df):.0f}%)")

# Show the 10 most influential (largest change from full)
loo_df['change'] = loo_df['Z_1_coef'] - z1_full
loo_df['abs_change'] = loo_df['change'].abs()
loo_df = loo_df.sort_values('abs_change', ascending=False)
print(f"\n  10 most influential countries:")
for _, row in loo_df.head(10).iterrows():
    sig = stars(row['Z_1_p'])
    print(f"    Drop {row['dropped']}: Z₁={row['Z_1_coef']:.4f} (Δ={row['change']:+.4f}) "
          f"p={row['Z_1_p']:.4f} {sig}")

# Save Part B
md = ["# Leave-One-Out Robustness — Rating Model\n"]
md.append(f"Baseline Z₁ coefficient: {z1_full:.4f} (p={z1_p_full:.4f})\n")
md.append(f"- Z₁ range across LOO: [{z1_min:.4f}, {z1_max:.4f}]")
md.append(f"- Mean LOO Z₁: {z1_mean:.4f}")
md.append(f"- Significant at 5%: {n_sig}/{len(loo_df)} ({100*n_sig/len(loo_df):.0f}%)")
md.append(f"- Significant at 10%: {n_sig10}/{len(loo_df)} ({100*n_sig10/len(loo_df):.0f}%)\n")
md.append("## 10 Most Influential Countries\n")
md.append("| Dropped | Z₁ Coef | Change | p-value | Sig |")
md.append("|---|---|---|---|---|")
for _, row in loo_df.head(10).iterrows():
    md.append(f"| {row['dropped']} | {row['Z_1_coef']:.4f} | {row['change']:+.4f} "
              f"| {row['Z_1_p']:.4f} | {stars(row['Z_1_p'])} |")
md.append("\n## Full LOO Results\n")
md.append("| Dropped | Z₁ Coef | SE | p-value | Sig | N |")
md.append("|---|---|---|---|---|---|")
for _, row in loo_df.iterrows():
    md.append(f"| {row['dropped']} | {row['Z_1_coef']:.4f} | {row['Z_1_se']:.4f} "
              f"| {row['Z_1_p']:.4f} | {stars(row['Z_1_p'])} | {int(row['n_obs'])} |")

out = TABLES_DIR / "referee_ratings_loo.md"
out.write_text('\n'.join(md))
print(f"\n  Saved: {out}")


# ═══════════════════════════════════════════════════════════════════════════
# PART C: GIIPS INFLUENCE DIAGNOSTICS — Spread Regressions
# ═══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("PART C: GIIPS Influence Diagnostics — Spread Model")
print("=" * 70)

GIIPS = ['GRC', 'ITA', 'PRT', 'ESP', 'IRL']
EMU_19 = ['AUT', 'BEL', 'FIN', 'FRA', 'DEU', 'IRL', 'ITA', 'LUX', 'NLD',
           'PRT', 'ESP', 'GRC', 'SVN', 'CYP', 'MLT', 'SVK', 'EST', 'LVA', 'LTU']

regs_spread = demo_vars + controls_spread
influence_results = []

# Full sample
gls, sub = run_gls(df, 'sovereign_spread', regs_spread)
if gls:
    print(f"  Full sample: Z₁={gls.beta[0]:.4f} (p={gls.pvalues[0]:.4f}) {stars(gls.pvalues[0])}, N={gls.n_obs}")
    influence_results.append({
        'Sample': 'Full sample', 'N': gls.n_obs, 'Countries': gls.n_countries,
        'Z_1_coef': gls.beta[0], 'Z_1_se': gls.se[0], 'Z_1_p': gls.pvalues[0],
        'R²': gls.r_squared,
    })

# Drop individual GIIPS
for c in GIIPS:
    sub_drop = df[df['iso3'] != c]
    gls, _ = run_gls(sub_drop, 'sovereign_spread', regs_spread)
    if gls:
        print(f"  Drop {c}: Z₁={gls.beta[0]:.4f} (p={gls.pvalues[0]:.4f}) {stars(gls.pvalues[0])}, N={gls.n_obs}")
        influence_results.append({
            'Sample': f'Drop {c}', 'N': gls.n_obs, 'Countries': gls.n_countries,
            'Z_1_coef': gls.beta[0], 'Z_1_se': gls.se[0], 'Z_1_p': gls.pvalues[0],
            'R²': gls.r_squared,
        })

# Drop all GIIPS
sub_no_giips = df[~df['iso3'].isin(GIIPS)]
gls, _ = run_gls(sub_no_giips, 'sovereign_spread', regs_spread)
if gls:
    print(f"  Drop all GIIPS: Z₁={gls.beta[0]:.4f} (p={gls.pvalues[0]:.4f}) {stars(gls.pvalues[0])}, N={gls.n_obs}")
    influence_results.append({
        'Sample': 'Drop all GIIPS', 'N': gls.n_obs, 'Countries': gls.n_countries,
        'Z_1_coef': gls.beta[0], 'Z_1_se': gls.se[0], 'Z_1_p': gls.pvalues[0],
        'R²': gls.r_squared,
    })

# Drop all EMU
sub_no_emu = df[~df['iso3'].isin(EMU_19)]
gls, _ = run_gls(sub_no_emu, 'sovereign_spread', regs_spread)
if gls:
    print(f"  Drop all EMU: Z₁={gls.beta[0]:.4f} (p={gls.pvalues[0]:.4f}) {stars(gls.pvalues[0])}, N={gls.n_obs}")
    influence_results.append({
        'Sample': 'Drop all eurozone', 'N': gls.n_obs, 'Countries': gls.n_countries,
        'Z_1_coef': gls.beta[0], 'Z_1_se': gls.se[0], 'Z_1_p': gls.pvalues[0],
        'R²': gls.r_squared,
    })

# Rolling windows with and without GIIPS (10-year windows)
print("\n  Rolling windows: full vs ex-GIIPS (10yr)")
rolling_influence = []
for start_year in range(1990, 2016):
    end_year = start_year + 9
    window_full = df[(df['year'] >= start_year) & (df['year'] <= end_year)]
    window_no_giips = window_full[~window_full['iso3'].isin(GIIPS)]

    gls_f, _ = run_gls(window_full, 'sovereign_spread', regs_spread)
    gls_ng, _ = run_gls(window_no_giips, 'sovereign_spread', regs_spread)

    if gls_f and gls_ng:
        rolling_influence.append({
            'window': f'{start_year}-{end_year}',
            'Z1_full': gls_f.beta[0], 'p_full': gls_f.pvalues[0],
            'Z1_exGIIPS': gls_ng.beta[0], 'p_exGIIPS': gls_ng.pvalues[0],
        })
        sig_f = stars(gls_f.pvalues[0])
        sig_ng = stars(gls_ng.pvalues[0])
        print(f"    {start_year}-{end_year}: Full Z₁={gls_f.beta[0]:>8.2f} {sig_f:>3}  |  "
              f"ex-GIIPS Z₁={gls_ng.beta[0]:>8.2f} {sig_ng:>3}")

# Save Part C
md = ["# GIIPS Influence Diagnostics — Spread Model\n"]
md.append("## Baseline Spread Regression: Drop Individual GIIPS\n")
md.append("| Sample | N | Countries | Z₁ Coef | SE | p-value | Sig | R² |")
md.append("|---|---|---|---|---|---|---|---|")
for r in influence_results:
    md.append(f"| {r['Sample']} | {r['N']:,} | {r['Countries']} "
              f"| {r['Z_1_coef']:.4f} | {r['Z_1_se']:.4f} | {r['Z_1_p']:.4f} "
              f"| {stars(r['Z_1_p'])} | {r['R²']:.3f} |")

if rolling_influence:
    md.append("\n## Rolling 10-Year Windows: Full vs Ex-GIIPS\n")
    md.append("| Window | Z₁ (Full) | Sig | Z₁ (ex-GIIPS) | Sig |")
    md.append("|---|---|---|---|---|")
    for r in rolling_influence:
        md.append(f"| {r['window']} | {r['Z1_full']:.2f} | {stars(r['p_full'])} "
                  f"| {r['Z1_exGIIPS']:.2f} | {stars(r['p_exGIIPS'])} |")

out = TABLES_DIR / "referee_spread_influence.md"
out.write_text('\n'.join(md))
print(f"\n  Saved: {out}")


# ═══════════════════════════════════════════════════════════════════════════
# PART D: COUNTRY FE + YEAR FE ROBUSTNESS
# ═══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("PART D: Country FE + Year FE Robustness")
print("=" * 70)

fe_results = []

def run_fe_variants(df_in, dep_var, regressors, model_label):
    """Run baseline, +year dummies, within+year dummies."""
    results = []
    regs = [r for r in regressors if r in df_in.columns]
    sub = df_in.dropna(subset=[dep_var] + regs).copy()
    if len(sub) < 50:
        print(f"  [{model_label}] Insufficient obs — skipping")
        return results

    # (1) Baseline PanelGLS
    gls, _ = run_gls(sub, dep_var, regs)
    if gls:
        row = {'Model': f'{model_label}: Baseline', 'N': gls.n_obs,
               'Countries': gls.n_countries, 'R²': gls.r_squared}
        for i, v in enumerate(regs):
            row[f'{v}_coef'] = gls.beta[i]
            row[f'{v}_se'] = gls.se[i]
            row[f'{v}_p'] = gls.pvalues[i]
        results.append(row)
        print(f"  {model_label} Baseline: Z₁={gls.beta[0]:.4f} (p={gls.pvalues[0]:.4f}) {stars(gls.pvalues[0])}")

    # (2) + Year dummies
    year_dummies = pd.get_dummies(sub['year'], prefix='yr', drop_first=True, dtype=float)
    yr_cols = list(year_dummies.columns)
    sub_yr = pd.concat([sub.reset_index(drop=True), year_dummies.reset_index(drop=True)], axis=1)
    regs_yr = regs + yr_cols

    gls, _ = run_gls(sub_yr, dep_var, regs_yr)
    if gls:
        row = {'Model': f'{model_label}: + Year FE', 'N': gls.n_obs,
               'Countries': gls.n_countries, 'R²': gls.r_squared}
        for i, v in enumerate(regs):
            row[f'{v}_coef'] = gls.beta[i]
            row[f'{v}_se'] = gls.se[i]
            row[f'{v}_p'] = gls.pvalues[i]
        results.append(row)
        print(f"  {model_label} +Year FE: Z₁={gls.beta[0]:.4f} (p={gls.pvalues[0]:.4f}) {stars(gls.pvalues[0])}")

    # (3) Within transformation (country FE) + year dummies
    # Demean by country
    sub_dm = sub.copy()
    demean_cols = [dep_var] + regs
    for col in demean_cols:
        sub_dm[col] = sub_dm.groupby('iso3')[col].transform(lambda x: x - x.mean())

    year_dummies_dm = pd.get_dummies(sub['year'], prefix='yr', drop_first=True, dtype=float)
    # Demean year dummies too (within transformation)
    sub_dm_full = pd.concat([sub_dm.reset_index(drop=True),
                              year_dummies_dm.reset_index(drop=True)], axis=1)
    # Copy entity/time ids from original
    sub_dm_full['iso3'] = sub['iso3'].values
    sub_dm_full['year'] = sub['year'].values

    regs_dm = regs + yr_cols
    gls, _ = run_gls(sub_dm_full, dep_var, regs_dm)
    if gls:
        row = {'Model': f'{model_label}: Country FE + Year FE', 'N': gls.n_obs,
               'Countries': gls.n_countries, 'R²': gls.r_squared}
        for i, v in enumerate(regs):
            row[f'{v}_coef'] = gls.beta[i]
            row[f'{v}_se'] = gls.se[i]
            row[f'{v}_p'] = gls.pvalues[i]
        results.append(row)
        print(f"  {model_label} Country+Year FE: Z₁={gls.beta[0]:.4f} (p={gls.pvalues[0]:.4f}) {stars(gls.pvalues[0])}")

    return results

# Rating baseline
fe_results += run_fe_variants(df, 'rating_numeric',
                               demo_vars + controls_rating + ['govt_debt_gdp'],
                               'Rating')

# Spread baseline
fe_results += run_fe_variants(df, 'sovereign_spread',
                               demo_vars + controls_spread,
                               'Spread')

# Spread with debt interaction
df['Z_1_x_debt'] = df['Z_1'] * df['govt_debt_gdp']
fe_results += run_fe_variants(df, 'sovereign_spread',
                               demo_vars + controls_spread + ['govt_debt_gdp', 'Z_1_x_debt'],
                               'Spread+Debt')

# Save Part D
md = ["# Country FE + Year FE Robustness\n"]
md.append("Within transformation: country-demeaned variables approximate country fixed effects.\n")

# Key variables to report
key_fe_vars = demo_vars + ['govt_debt_gdp', 'Z_1_x_debt'] + controls_spread
md.append("| Model | N | Countries | R² |")
md.append("|---|---|---|---|")
for r in fe_results:
    md.append(f"| {r['Model']} | {r['N']:,} | {r['Countries']} | {r['R²']:.3f} |")

md.append("\n## Key Coefficients\n")
md.append("| Model | Variable | Coef | SE | p-value | Sig |")
md.append("|---|---|---|---|---|---|")
for r in fe_results:
    for v in demo_vars + ['govt_debt_gdp', 'Z_1_x_debt'] + controls_spread:
        ck = f'{v}_coef'
        if ck in r:
            p = r[f'{v}_p']
            md.append(f"| {r['Model']} | {v} | {r[ck]:.4f} | {r[f'{v}_se']:.4f} "
                      f"| {p:.4f} | {stars(p)} |")

out = TABLES_DIR / "referee_fe_robustness.md"
out.write_text('\n'.join(md))
print(f"\n  Saved: {out}")


# ═══════════════════════════════════════════════════════════════════════════
# PART E: MEDIATION DECOMPOSITION (Suppression Framing)
# ═══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("PART E: Mediation / Suppression Decomposition")
print("=" * 70)

mediation_results = []

# E1: Rating WITHOUT fiscal controls
regs_no_fiscal = demo_vars + ['rgdp_growth', 'kaopen']
gls1, sub1 = run_gls(df, 'rating_numeric', regs_no_fiscal)
if gls1:
    z1_no_fiscal = gls1.beta[0]
    z1_p_no_fiscal = gls1.pvalues[0]
    print(f"  E1: Rating w/o fiscal controls: Z₁={z1_no_fiscal:.4f} (p={z1_p_no_fiscal:.4f}) {stars(z1_p_no_fiscal)}")
    mediation_results.append({
        'Model': 'Rating (no fiscal)',
        'Z_1_coef': z1_no_fiscal, 'Z_1_p': z1_p_no_fiscal,
        'N': gls1.n_obs, 'R²': gls1.r_squared,
    })

# E2: Rating WITH fiscal controls (fiscal_bal_gdp only)
regs_with_fiscal = demo_vars + ['rgdp_growth', 'kaopen', 'fiscal_bal_gdp']
gls2, sub2 = run_gls(df, 'rating_numeric', regs_with_fiscal)
if gls2:
    z1_with_fiscal = gls2.beta[0]
    z1_p_with_fiscal = gls2.pvalues[0]
    print(f"  E2: Rating w/ fiscal_bal: Z₁={z1_with_fiscal:.4f} (p={z1_p_with_fiscal:.4f}) {stars(z1_p_with_fiscal)}")
    mediation_results.append({
        'Model': 'Rating (+ fiscal_bal)',
        'Z_1_coef': z1_with_fiscal, 'Z_1_p': z1_p_with_fiscal,
        'N': gls2.n_obs, 'R²': gls2.r_squared,
    })

# E3: Rating WITH debt_gdp
regs_with_debt = demo_vars + ['rgdp_growth', 'kaopen', 'fiscal_bal_gdp', 'govt_debt_gdp']
gls3, sub3 = run_gls(df, 'rating_numeric', regs_with_debt)
if gls3:
    z1_with_debt = gls3.beta[0]
    z1_p_with_debt = gls3.pvalues[0]
    print(f"  E3: Rating w/ fiscal+debt: Z₁={z1_with_debt:.4f} (p={z1_p_with_debt:.4f}) {stars(z1_p_with_debt)}")
    mediation_results.append({
        'Model': 'Rating (+ fiscal_bal + debt)',
        'Z_1_coef': z1_with_debt, 'Z_1_p': z1_p_with_debt,
        'N': gls3.n_obs, 'R²': gls3.r_squared,
    })

# E4: Correlations to explain the suppression pattern
sub_corr = df.dropna(subset=['Z_1', 'govt_debt_gdp', 'fiscal_bal_gdp', 'rating_numeric']).copy()
corr_z1_debt = sub_corr['Z_1'].corr(sub_corr['govt_debt_gdp'])
corr_z1_fiscal = sub_corr['Z_1'].corr(sub_corr['fiscal_bal_gdp'])
corr_z1_rating = sub_corr['Z_1'].corr(sub_corr['rating_numeric'])
corr_debt_rating = sub_corr['govt_debt_gdp'].corr(sub_corr['rating_numeric'])
print(f"\n  Correlations:")
print(f"    Z₁ ↔ debt/GDP:     {corr_z1_debt:+.3f}")
print(f"    Z₁ ↔ fiscal_bal:   {corr_z1_fiscal:+.3f}")
print(f"    Z₁ ↔ rating:       {corr_z1_rating:+.3f}")
print(f"    debt/GDP ↔ rating:  {corr_debt_rating:+.3f}")

if gls1 and gls3:
    change = z1_with_debt - z1_no_fiscal
    pct = 100 * change / abs(z1_no_fiscal) if z1_no_fiscal != 0 else np.nan
    direction = "INCREASES" if abs(z1_with_debt) > abs(z1_no_fiscal) else "decreases"
    print(f"\n  Z₁ coefficient {direction} from {z1_no_fiscal:.4f} to {z1_with_debt:.4f}")
    print(f"  Change: {change:+.4f} ({pct:+.1f}%)")
    if direction == "INCREASES":
        print("  → This is SUPPRESSION, not mediation.")
        print("    Older countries tend to be richer with higher debt but also better ratings.")
        print("    Controlling for debt removes the positive debt-rating correlation,")
        print("    isolating the pure aging penalty on ratings.")

# Save Part E
md = ["# Mediation / Suppression Decomposition\n"]
md.append("## Z₁ Coefficient Across Specifications\n")
md.append("| Model | Z₁ Coef | p-value | Sig | N | R² |")
md.append("|---|---|---|---|---|---|")
for r in mediation_results:
    md.append(f"| {r['Model']} | {r['Z_1_coef']:.4f} | {r['Z_1_p']:.4f} "
              f"| {stars(r['Z_1_p'])} | {r['N']:,} | {r['R²']:.3f} |")

md.append("\n## Correlations\n")
md.append("| Pair | Correlation |")
md.append("|---|---|")
md.append(f"| Z₁ ↔ govt_debt_gdp | {corr_z1_debt:+.3f} |")
md.append(f"| Z₁ ↔ fiscal_bal_gdp | {corr_z1_fiscal:+.3f} |")
md.append(f"| Z₁ ↔ rating_numeric | {corr_z1_rating:+.3f} |")
md.append(f"| govt_debt_gdp ↔ rating_numeric | {corr_debt_rating:+.3f} |")

md.append("\n## Interpretation\n")
if gls1 and gls3 and abs(z1_with_debt) > abs(z1_no_fiscal):
    md.append("The Z₁ coefficient **increases in magnitude** when fiscal controls are added. "
              "This is classical **suppression**, not mediation.\n")
    md.append(f"- Without fiscal controls: Z₁ = {z1_no_fiscal:.4f}")
    md.append(f"- With fiscal_bal + debt: Z₁ = {z1_with_debt:.4f}")
    md.append(f"- Change: {change:+.4f} ({pct:+.1f}%)\n")
    md.append("**Mechanism**: Older countries (high Z₁) tend to be richer OECD nations with "
              "higher debt-to-GDP but also structurally better credit ratings. "
              "The positive Z₁-debt correlation creates downward bias in the Z₁ coefficient. "
              "Controlling for debt removes this confound, revealing the true (larger) "
              "aging penalty on ratings.")

out = TABLES_DIR / "referee_mediation_proper.md"
out.write_text('\n'.join(md))
print(f"\n  Saved: {out}")


# ═══════════════════════════════════════════════════════════════════════════
# PART F: STRUCTURAL BREAK CLARIFICATION
# ═══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("PART F: Structural Break Clarification (Sup-F vs Chow)")
print("=" * 70)

vars_break = [v for v in demo_vars + controls_rating + ['govt_debt_gdp'] if v in df.columns]
full = df.dropna(subset=['rating_numeric'] + vars_break).copy()
full = full[(full['year'] >= 1995) & (full['year'] <= 2022)]
print(f"  Break sample: {full['iso3'].nunique()} countries, {len(full):,} obs, "
      f"{full['year'].min()}-{full['year'].max()}")

# Helper: Chow F-stat
def chow_test(data, dep_var, regs, break_year):
    """Compute Chow test F-statistic for a given break year."""
    pre = data[data['year'] <= break_year]
    post = data[data['year'] > break_year]
    if len(pre) < 50 or len(post) < 50:
        return None

    k = len(regs)
    n = len(data)

    # Full sample
    gls_f = PanelGLS()
    gls_f.fit(data[dep_var].values, data[regs].values,
              data['iso3'].values, data['year'].values)
    rss_full = np.sum(gls_f.resid ** 2)

    # Pre
    gls_pre = PanelGLS()
    gls_pre.fit(pre[dep_var].values, pre[regs].values,
                pre['iso3'].values, pre['year'].values)
    rss_pre = np.sum(gls_pre.resid ** 2)

    # Post
    gls_post = PanelGLS()
    gls_post.fit(post[dep_var].values, post[regs].values,
                 post['iso3'].values, post['year'].values)
    rss_post = np.sum(gls_post.resid ** 2)

    denom = (rss_pre + rss_post) / (n - 2 * (k + 1))
    if denom <= 0:
        return None
    F = ((rss_full - rss_pre - rss_post) / (k + 1)) / denom
    p = 1 - stats.f.cdf(max(F, 0), k + 1, n - 2 * (k + 1))

    return {
        'break_year': break_year, 'F': F, 'p': p,
        'n_pre': len(pre), 'n_post': len(post),
        'Z1_pre': gls_pre.beta[0], 'Z1_post': gls_post.beta[0],
        'Z1_p_pre': gls_pre.pvalues[0], 'Z1_p_post': gls_post.pvalues[0],
    }

# (1) Sup-F: scan all breakpoints
print("\n  Sup-F scan (all breakpoints 1999-2016):")
f_stats = []
for by in range(1999, 2017):
    result = chow_test(full, 'rating_numeric', vars_break, by)
    if result:
        f_stats.append(result)
        sig = stars(result['p'])
        print(f"    {by}: F={result['F']:.2f}, p={result['p']:.4f} {sig}  "
              f"(Z₁_pre={result['Z1_pre']:.2f}, Z₁_post={result['Z1_post']:.2f})")

if f_stats:
    fdf = pd.DataFrame(f_stats)
    best = fdf.loc[fdf['F'].idxmax()]
    print(f"\n  ★ Sup-F breakpoint: {int(best['break_year'])} (F={best['F']:.2f}, p={best['p']:.4f})")

# (2) Chow at 2001
print("\n  Chow test at 2001:")
r_2001 = chow_test(full, 'rating_numeric', vars_break, 2001)
if r_2001:
    print(f"    F={r_2001['F']:.2f}, p={r_2001['p']:.4f} {stars(r_2001['p'])}")

# (3) Chow at 2008
print("\n  Chow test at 2008:")
r_2008 = chow_test(full, 'rating_numeric', vars_break, 2008)
if r_2008:
    print(f"    F={r_2008['F']:.2f}, p={r_2008['p']:.4f} {stars(r_2008['p'])}")

# (4) Interaction approach: Z₁ × post_break dummy
print("\n  Interaction approach (Z₁ × post-break dummy):")
interaction_breaks = []
for by in [2001, 2008, int(best['break_year']) if f_stats else 2008]:
    full_tmp = full.copy()
    full_tmp['post'] = (full_tmp['year'] > by).astype(float)
    full_tmp['Z_1_x_post'] = full_tmp['Z_1'] * full_tmp['post']
    regs_int = vars_break + ['post', 'Z_1_x_post']
    gls, _ = run_gls(full_tmp, 'rating_numeric', regs_int)
    if gls:
        # Find indices
        idx_z1 = 0  # Z_1 is first
        idx_post = len(vars_break)
        idx_int = len(vars_break) + 1
        print(f"    Break at {by}: Z₁={gls.beta[idx_z1]:.4f} {stars(gls.pvalues[idx_z1])}, "
              f"post={gls.beta[idx_post]:.4f} {stars(gls.pvalues[idx_post])}, "
              f"Z₁×post={gls.beta[idx_int]:.4f} {stars(gls.pvalues[idx_int])}")
        interaction_breaks.append({
            'break': by,
            'Z1_coef': gls.beta[idx_z1], 'Z1_p': gls.pvalues[idx_z1],
            'post_coef': gls.beta[idx_post], 'post_p': gls.pvalues[idx_post],
            'Z1xpost_coef': gls.beta[idx_int], 'Z1xpost_p': gls.pvalues[idx_int],
            'R²': gls.r_squared, 'N': gls.n_obs,
        })

# Save Part F
md = ["# Structural Break Clarification\n"]

md.append("## Sup-F Scan (All Breakpoints)\n")
md.append("| Break Year | F-stat | p-value | Sig | Z₁ (pre) | Z₁ (post) |")
md.append("|---|---|---|---|---|---|")
for r in f_stats:
    md.append(f"| {r['break_year']} | {r['F']:.2f} | {r['p']:.4f} | {stars(r['p'])} "
              f"| {r['Z1_pre']:.2f} | {r['Z1_post']:.2f} |")

if f_stats:
    md.append(f"\n**Sup-F breakpoint**: {int(best['break_year'])} (F={best['F']:.2f}, p={best['p']:.4f})\n")

md.append("## Specific Chow Tests\n")
md.append("| Break | F-stat | p-value | Sig |")
md.append("|---|---|---|---|")
if r_2001:
    md.append(f"| 2001 | {r_2001['F']:.2f} | {r_2001['p']:.4f} | {stars(r_2001['p'])} |")
if r_2008:
    md.append(f"| 2008 | {r_2008['F']:.2f} | {r_2008['p']:.4f} | {stars(r_2008['p'])} |")
if f_stats:
    md.append(f"| {int(best['break_year'])} (Sup-F) | {best['F']:.2f} | {best['p']:.4f} | {stars(best['p'])} |")

md.append("\n## Interaction Approach (Z₁ × post dummy)\n")
md.append("| Break | Z₁ | Sig | Post | Sig | Z₁×Post | Sig | R² | N |")
md.append("|---|---|---|---|---|---|---|---|---|")
for r in interaction_breaks:
    md.append(f"| {r['break']} | {r['Z1_coef']:.4f} | {stars(r['Z1_p'])} "
              f"| {r['post_coef']:.4f} | {stars(r['post_p'])} "
              f"| {r['Z1xpost_coef']:.4f} | {stars(r['Z1xpost_p'])} "
              f"| {r['R²']:.3f} | {r['N']:,} |")

md.append("\n## Interpretation\n")
md.append("The Sup-F test detects the breakpoint with **maximum** instability across all candidate "
          "years. The Chow test at a specific date (e.g. 2008) tests only that single break. "
          "The interaction approach tests whether Z₁'s effect changes **within** a single "
          "regression, but has lower power than the split-sample Chow test because it imposes "
          "a common error variance across regimes.\n")
md.append("When the Sup-F detects instability but the Z₁×post interaction is insignificant, "
          "this typically means the break affects **multiple** coefficients (not just Z₁), "
          "or the structural change is gradual rather than a sharp regime switch.")

out = TABLES_DIR / "referee_break_clarification.md"
out.write_text('\n'.join(md))
print(f"\n  Saved: {out}")


# ═══════════════════════════════════════════════════════════════════════════
# DONE
# ═══════════════════════════════════════════════════════════════════════════
print("\n" + "=" * 70)
print("PHASE 5 COMPLETE — All referee tables saved to output/tables/")
print("=" * 70)
